* Import data
import excel using "bothsamples_new.xlsx", sheet("Sheet1") firstrow clear

* Generating country variables 
gen income = 1 if country_isocode1=="ARE" | country_isocode1=="AUS" | country_isocode1=="BEL" | country_isocode1=="BGR" | country_isocode1=="BHR" | country_isocode1=="CAN" | country_isocode1=="CHN" | country_isocode1=="DNK" | country_isocode1=="ITA" | country_isocode1=="KAZ" | country_isocode1=="MYS" | country_isocode1=="NZL" | country_isocode1=="OMN" | country_isocode1=="QAT" | country_isocode1=="ROU" | country_isocode1=="RUS" | country_isocode1=="SAU" | country_isocode1=="THA" | country_isocode1=="USA" | country_isocode1=="ZAF" 

replace income = 0 if income!=1

gen pop = 1 if country_isocode1=="AUS" | country_isocode1=="BFA" | country_isocode1=="CAN" | country_isocode1=="CHN" | country_isocode1=="COD" | country_isocode1=="EGY" | country_isocode1=="GHA" | country_isocode1=="ITA" | country_isocode1=="KEN" | country_isocode1=="LKA" | country_isocode1=="MWI" | country_isocode1=="MYS" | country_isocode1=="NGA" | country_isocode1=="RUS" | country_isocode1=="SAU" | country_isocode1=="THA" | country_isocode1=="UGA" | country_isocode1=="USA" | country_isocode1=="YEM" | country_isocode1=="ZAF" 

replace pop = 0 if pop!=1

gen dem = 1 if country_isocode1=="AUS" | country_isocode1=="BEL" | country_isocode1=="BFA" | country_isocode1=="BGR" | country_isocode1=="BOL" | country_isocode1=="CAN" | country_isocode1=="DNK" | country_isocode1=="GHA" | country_isocode1=="ITA" | country_isocode1=="LBR" | country_isocode1=="LKA" | country_isocode1=="MDA" | country_isocode1=="MWI" | country_isocode1=="NGA" | country_isocode1=="NZL" | country_isocode1=="ROU" | country_isocode1=="SEN" | country_isocode1=="TUN" | country_isocode1=="USA" | country_isocode1=="ZAF" 

replace dem = 0 if dem!=1

tab country_isocode1 if income==1
tab country_isocode1 if pop==1 
tab country_isocode1 if dem==1

* Checking that all names are identical 
list if name1 != name2

* Checking that all titles are identical 
list name1 title1 title2 if title1 != title2

* Checking correlatios in firstyear 
destring firstyear1, gen(firstyear11) 
destring firstyear2, gen(firstyear22)
corr firstyear11 firstyear22

* Checking correlations in lastyear 
destring lastyear1, gen(lastyear11) 
destring lastyear2, gen(lastyear22)
corr lastyear11 lastyear22



*** CALCULATING THE INTER-CODER RELIABILITY TESTS FOR TABLE G1 ***

* Checking for DEGREES (first core variable, include in appendix from here and on)
fre degree2
replace degree2 = "99" if degree2=="NA"
destring degree1, gen(degree11) 
destring degree2, gen(degree22)

replace degree22 = . if degree22==99
tab degree11
tab degree22
corr degree11 degree22

kap degree11 degree22
kap degree11 degree22, wgt(w) // less dissimilar ratings are weighted as less important mismatches 

* Checking in different types of countries
kap degree11 degree22 if income==1
kap degree11 degree22 if income==1, wgt(w)

kap degree11 degree22 if income==0
kap degree11 degree22 if income==0, wgt(w)

kap degree11 degree22 if pop==1
kap degree11 degree22 if pop==1, wgt(w)

kap degree11 degree22 if pop==0
kap degree11 degree22 if pop==0, wgt(w)

kap degree11 degree22 if dem==1
kap degree11 degree22 if dem==1, wgt(w)

kap degree11 degree22 if dem==0
kap degree11 degree22 if dem==0, wgt(w)





* Checking for DEGREETYPE 
fre degreetype2
replace degreetype2 = "99" if degreetype2=="NA"
destring degreetype1, gen(degreetype11)
destring degreetype2, gen(degreetype22)
replace degreetype22 = . if degreetype22==99 
fre degreetype11
fre degreetype22
corr degreetype11 degreetype22

kap degreetype11 degreetype22 


list degreetype_text1 degreetype_text2 if degreetype11 != degreetype22

* Checking in different types of countries
kap degreetype11 degreetype22 if income==1
kap degreetype11 degreetype22 if income==0

kap degreetype11 degreetype22 if pop==1
kap degreetype11 degreetype22 if pop==0

kap degreetype11 degreetype22 if dem==1
kap degreetype11 degreetype22 if dem==0





* Checking for UNIVERSITY 
fre university2
replace university2 = "" if university2=="NA"
list name1 university1 university2 if university1 != university2



* Checking for ABROAD
encode abroad1, gen(abroad11)
fre abroad11
replace abroad11 = 1 if abroad11==3
replace abroad11 = 2 if abroad11==4
tab abroad11

encode abroad2, gen(abroad22)
replace abroad22 = . if abroad22==3
replace abroad22 = 1 if abroad22==4
replace abroad22 = 2 if abroad22==5
tab abroad22

kap abroad11 abroad22

* Checking in different types of countries
kap abroad11 abroad22 if income==1
kap abroad11 abroad22 if income==0

kap abroad11 abroad22 if pop==1
kap abroad11 abroad22 if pop==0

kap abroad11 abroad22 if dem==1
kap abroad11 abroad22 if dem==0




* Checking for ABROADCOUNTRY 
fre abroadcountry1
fre abroadcountry2

* manipulate abroadcountry2 to keep only first alphabetical country as for abroadcountry2 
gen abroadcountry2b = abroadcountry2
replace abroadcountry2b = subinstr(abroadcountry2b, ",", " , ", .)
replace abroadcountry2b = word(abroadcountry2b, 1)
tab abroadcountry2b

encode abroadcountry1, gen(abroadcountry11)
tab abroadcountry1 
fre abroadcountry11

gen abroadcountry22 = .
replace abroadcountry22 = 1 if abroadcountry2b=="Australia"
replace abroadcountry22 = 2 if abroadcountry2b=="Belgium"
replace abroadcountry22 = 3 if abroadcountry2b=="Bulgaria"
replace abroadcountry22 = 4 if abroadcountry2b=="Canada"
replace abroadcountry22 = 5 if abroadcountry2b=="Egypt"
replace abroadcountry22 = 6 if abroadcountry2b=="England"
replace abroadcountry22 = 7 if abroadcountry2b=="France"
replace abroadcountry22 = 8 if abroadcountry2b=="Germany"
replace abroadcountry22 = 9 if abroadcountry2b=="India"
replace abroadcountry22 = 10 if abroadcountry2b=="Iran"
replace abroadcountry22 = 11 if abroadcountry2b=="Lebanon"
replace abroadcountry22 = 12 if abroadcountry2b=="Pakistan"
replace abroadcountry22 = 13 if abroadcountry2b=="Romania"
replace abroadcountry22 = 14 if abroadcountry2b=="Russia"
replace abroadcountry22 = 15 if abroadcountry2b=="Senegal"
replace abroadcountry22 = 16 if abroadcountry2b=="Sierra Leone"
replace abroadcountry22 = 17 if abroadcountry2b=="Singapore"
replace abroadcountry22 = 18 if abroadcountry2b=="South Africa"
replace abroadcountry22 = 19 if abroadcountry2b=="Soviet Union (Russia)" | abroadcountry2b=="Soviet"
replace abroadcountry22 = 20 if abroadcountry2b=="Switzerland"
replace abroadcountry22 = 21 if abroadcountry2b=="Tanzania"
replace abroadcountry22 = 22 if abroadcountry2b=="USA"
replace abroadcountry22 = 23 if abroadcountry2b=="USSR"
replace abroadcountry22 = 24 if abroadcountry2b=="Uganda"
replace abroadcountry22 = 25 if abroadcountry2b=="United Kingdom"
replace abroadcountry22 = 26 if abroadcountry2b=="Botswana"
replace abroadcountry22 = 27 if abroadcountry2b=="Brazil"
replace abroadcountry22 = 28 if abroadcountry2b=="Ceylon"
replace abroadcountry22 = 29 if abroadcountry2b=="Côte"
replace abroadcountry22 = 30 if abroadcountry2b=="Iraq"
replace abroadcountry22 = 31 if abroadcountry2b=="Italy"
replace abroadcountry22 = 32 if abroadcountry2b=="Saudi-Arabia"
replace abroadcountry22 = 33 if abroadcountry2b=="South"
replace abroadcountry22 = 34 if abroadcountry2b=="Soviet"
replace abroadcountry22 = 34 if abroadcountry2b=="Spain"
replace abroadcountry22 = 34 if abroadcountry2b=="Syria"
// coders have used USSR and Soviet Union, USA and United States. Need to make similar.

* Checking for PLACEOFBIRTH 
fre placeofbirth1
fre placeofbirth2
list name1 placeofbirth1 placeofbirth2 if placeofbirth1 != placeofbirth2
count if placeofbirth1 != placeofbirth2

* Checking for ROYAL
encode royal1, gen(royal11)
fre royal11
replace royal11 = 1 if royal11==2
replace royal11 = 2 if royal11==3
label drop royal11
fre royal11

encode royal2, gen(royal22)
fre royal22
replace royal22 = . if royal22==3
replace royal22 = 1 if royal22==4 
replace royal22 = 2 if royal22==5 

kap royal11 royal22

* Checking in different types of countries
kap royal11 royal22 if income==1
kap royal11 royal22 if income==0

kap royal11 royal22 if pop==1
kap royal11 royal22 if pop==0

kap royal11 royal22 if dem==1
kap royal11 royal22 if dem==0



* Checking for POLITICALFAMILY
encode politicalfamily1, gen(politicalfamily11)
fre politicalfamily11
label drop politicalfamily11
replace politicalfamily11 = 1 if politicalfamily11==2
replace politicalfamily11 = 2 if politicalfamily11==3
fre politicalfamily11

encode politicalfamily2, gen(politicalfamily22)
fre politicalfamily22
replace politicalfamily22 = . if politicalfamily22==3
replace politicalfamily22 = 1 if politicalfamily22==4
replace politicalfamily22 = 2 if politicalfamily22==5
fre politicalfamily22 

kap politicalfamily11 politicalfamily22

* Checking in different types of countries
kap politicalfamily11 politicalfamily22 if income==1
kap politicalfamily11 politicalfamily22 if income==0

kap politicalfamily11 politicalfamily22 if pop==1
kap politicalfamily11 politicalfamily22 if pop==0

kap politicalfamily11 politicalfamily22 if dem==1
kap politicalfamily11 politicalfamily22 if dem==0


* Checking for CLASS 
encode class1, gen(class11)
fre class11
encode class2, gen(class22)
fre class22
replace class22 = . if class22==7

kap class11 class22
kap class11 class22, wgt(w)

* Separate class background into its two dimensions and run separately 
gen classhierarchy1 = 1 if class11==3 // upper class, primary sector 
replace classhierarchy1 = 1 if class11==6 // upper class, other sectors 
replace classhierarchy1 = 2 if class11==5 // middle class, other sectors
replace classhierarchy1 = 2 if class11==2 // middle class, primary sectors
replace classhierarchy1 = 3 if class11==4 // working class, other
replace classhierarchy1 = 3 if class11==1 // working class, primary

gen classhierarchy2 = 1 if class22==3 // upper class, primary sector 
replace classhierarchy2 = 1 if class22==6 // upper class, other sectors 
replace classhierarchy2 = 2 if class22==5 // middle class, other sectors
replace classhierarchy2 = 2 if class22==2 // middle class, primary sectors
replace classhierarchy2 = 3 if class22==4 // working class, other
replace classhierarchy2 = 3 if class22==1 // working class, primary

tab class1 classhierarchy1
tab class2 classhierarchy2

tab class11 classhierarchy1

kap classhierarchy1 classhierarchy2
kap classhierarchy1 classhierarchy2, wgt(w)

* Checking in different types of countries
kap classhierarchy1 classhierarchy2 if income==1
kap classhierarchy1 classhierarchy2 if income==1, wgt(w)
kap classhierarchy1 classhierarchy2 if income==0
kap classhierarchy1 classhierarchy2 if income==0, wgt(w)

kap classhierarchy1 classhierarchy2 if pop==1
kap classhierarchy1 classhierarchy2 if pop==1, wgt(w)
kap classhierarchy1 classhierarchy2 if pop==0
kap classhierarchy1 classhierarchy2 if pop==0, wgt(w)

kap classhierarchy1 classhierarchy2 if dem==1
kap classhierarchy1 classhierarchy2 if dem==1, wgt(w)
kap classhierarchy1 classhierarchy2 if dem==0
kap classhierarchy1 classhierarchy2 if dem==0, wgt(w)


gen classector1 = 1 if class11==1 | class11==2 | class11==3 // primary
replace classector1 = 2 if class11==4 | class11==5 | class11==6 // other sectors 

gen classector2 = 1 if class22==1 | class22==2 | class22==3 // primary
replace classector2 = 2 if class22==4 | class22==5 | class22==6 // other sectors 

tab class11 classector1 
tab class22 classector2

kap classector1 classector2

* Checking in different types of countries
kap classector1 classector2 if income==1
kap classector1 classector2 if income==0

kap classector1 classector2 if pop==1
kap classector1 classector2 if pop==0

kap classector1 classector2 if dem==1
kap classector1 classector2 if dem==0


* Checking for OCCUPATION 
encode occupation1, gen(occupation11)
encode occupation2, gen(occupation22)
* these give different values to occ11 and occ22! must do manually 
fre occupation22
fre occupation11
replace occupation22 = . if occupation22==22

tab occupation11 occupation22 

tab occupation1
tab occupation_text1 occupation1

gen occ1 = 1 if occupation1=="1" // academic
replace occ1 = 2 if occupation1=="2" // student
replace occ1 = 3 if occupation1=="3"
replace occ1 = 4 if occupation1=="4"
replace occ1 = 5 if occupation1=="5"
replace occ1 = 6 if occupation1=="6"
replace occ1 = 7 if occupation1=="7"
replace occ1 = 8 if occupation1=="8"
replace occ1 = 9 if occupation1=="9"
replace occ1 = 10 if occupation1=="10"
replace occ1 = 11 if occupation1=="11" 
replace occ1 = 12 if occupation1=="12"
replace occ1 = 13 if occupation1=="13"
replace occ1 = 14 if occupation1=="14"
replace occ1 = 15 if occupation1=="15"
replace occ1 = 16 if occupation1=="16"
replace occ1 = 17 if occupation1=="17"
replace occ1 = 18 if occupation1=="18"
replace occ1 = 19 if occupation1=="19"
replace occ1 = 20 if occupation1=="20"
replace occ1 = 21 if occupation1=="21"
replace occ1 = 22 if occupation1=="22"
replace occ1 = 23 if occupation1=="23"

gen occ2 = 1 if occupation2=="1" // academic
replace occ2 = 2 if occupation2=="2" // student
replace occ2 = 3 if occupation2=="3"
replace occ2 = 4 if occupation2=="4"
replace occ2 = 5 if occupation2=="5"
replace occ2 = 6 if occupation2=="6"
replace occ2 = 7 if occupation2=="7"
replace occ2 = 8 if occupation2=="8"
replace occ2 = 9 if occupation2=="9"
replace occ2 = 10 if occupation2=="10"
replace occ2 = 11 if occupation2=="11" 
replace occ2 = 12 if occupation2=="12"
replace occ2 = 13 if occupation2=="13"
replace occ2 = 14 if occupation2=="14"
replace occ2 = 15 if occupation2=="15"
replace occ2 = 16 if occupation2=="16"
replace occ2 = 17 if occupation2=="17"
replace occ2 = 18 if occupation2=="18"
replace occ2 = 19 if occupation2=="19"
replace occ2 = 20 if occupation2=="20"
replace occ2 = 21 if occupation2=="21"
replace occ2 = 22 if occupation2=="22"
replace occ2 = 23 if occupation2=="23"

tab occ1
tab occ2

kap occ1 occ2

* Checking in different types of countries
kap occ1 occ2 if income==1
kap occ1 occ2 if income==0

kap occ1 occ2 if pop==1
kap occ1 occ2 if pop==0

kap occ1 occ2 if dem==1
kap occ1 occ2 if dem==0




* Checking for POLITICIAN
encode politician1, gen(politician11)
fre politician11
encode politician2, gen(politician22)
fre politician22
replace politician22 = . if politician22==4

kap politician11 politician22
kap politician11 politician22, wgt(w)
tab politician_text1

fre politician2

tab politician11 politician_text1
tab politician22 politician_text2


* Checking in different types of countries
kap politician11 politician22 if income==1
kap politician11 politician22 if income==0

kap politician11 politician22 if pop==1
kap politician11 politician22 if pop==0

kap politician11 politician22 if dem==1
kap politician11 politician22 if dem==0






